# install.packages("devtools")
library(devtools)
## Warning: package 'devtools' was built under R version 3.4.1
# source("http://bioconductor.org/biocLite.R")
# biocLite("qvalue")
# install_github("whitlock/OutFLANK")
library(OutFLANK)
## Loading required package: qvalue
if(!("adegenet" %in% installed.packages())){install.packages("adegenet")}
library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.5.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(adegenet)
## Loading required package: ade4
## Warning: package 'ade4' was built under R version 3.4.1
##
## /// adegenet 2.0.1 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
library(pcadapt)
library(lfmm)
library(LEA)
## devtools::install_github("privefl/bigsnpr")
library(bigsnpr)
## Loading required package: bigstatsr
library(bigstatsr)
library(vcfR)
library(RColorBrewer)
library(ggplot2)
library(fields)
## Loading required package: spam
## Loading required package: grid
## Spam version 1.4-0 (2016-08-29) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
## Loading required package: maps
seed <- 1505550948364
vcf <- read.vcfR(paste0("evals/",seed,"_Invers_VCFallsim1.vcf.gz"))
##
Meta line 13 read in.
## All meta lines processed.
## Character matrix gt created.
## Character matrix gt rows: 13011
## Character matrix gt cols: 1009
## skip: 0
## nrows: 13011
## row_num: 0
##
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant: 13011
## All variants processed
ind0 <- read.table(paste0("evals/",seed,"_Invers_outputIndAll.txt"), header=TRUE)
muts <- read.table(paste0("evals/",seed,"_Invers_outputMuts.txt"), header=TRUE)
phen_env <- read.table(paste0("evals/",seed,"_Invers_outputPhenEnv.txt"), header=TRUE)
sim <- system(paste0("grep recomb evals/",seed,"_Invers_outputSim.txt"), TRUE)
muts$pa2 <- round(muts$selCoef^2*muts$freq*(1-muts$freq),3)
muts$prop=NA
muts$prop[muts$type=="m2"] <- muts$pa2[muts$type=="m2"]/sum(muts$pa2[muts$type=="m2"])
which(duplicated(muts$position))
## integer(0)
muts$count <- FALSE
muts$count[muts$prop>=0.01] <- TRUE
muts$count[muts$type=="m4" & muts$freq > 0.05] <- TRUE
muts
## position selCoef originGen type freq pa2 prop count
## 1 51475 0.1688910 5948 m2 0.0070 0.000 0.00000000 FALSE
## 2 51581 -0.3446080 5987 m2 0.0080 0.001 0.01098901 TRUE
## 3 52211 0.0336784 1650 m2 0.8740 0.000 0.00000000 FALSE
## 4 53012 0.2757600 5395 m2 0.0340 0.002 0.02197802 TRUE
## 5 53800 -1.0843600 5999 m2 0.0010 0.001 0.01098901 TRUE
## 6 54966 -0.4380020 172 m2 1.0000 0.000 0.00000000 FALSE
## 7 59393 -0.2582460 5997 m2 0.0005 0.000 0.00000000 FALSE
## 8 65350 -0.4411480 5999 m2 0.0005 0.000 0.00000000 FALSE
## 9 66103 -0.3656770 6000 m2 0.0005 0.000 0.00000000 FALSE
## 10 67017 0.1294500 2702 m2 0.8465 0.002 0.02197802 TRUE
## 11 67058 0.4800990 53 m2 0.5045 0.058 0.63736264 TRUE
## 12 69724 0.2111330 5999 m2 0.0005 0.000 0.00000000 FALSE
## 13 69766 -0.3176070 4318 m2 0.0770 0.007 0.07692308 TRUE
## 14 70769 -0.5431350 5988 m2 0.0080 0.002 0.02197802 TRUE
## 15 73015 0.2998850 5995 m2 0.0020 0.000 0.00000000 FALSE
## 16 73701 0.1616660 1623 m2 1.0000 0.000 0.00000000 FALSE
## 17 77187 -0.4676320 6000 m2 0.0005 0.000 0.00000000 FALSE
## 18 79305 -0.1617850 5975 m2 0.0020 0.000 0.00000000 FALSE
## 19 79805 0.6184820 5999 m2 0.0010 0.000 0.00000000 FALSE
## 20 79995 -0.1745930 3902 m2 0.5485 0.008 0.08791209 TRUE
## 21 81126 0.2498160 5937 m2 0.0270 0.002 0.02197802 TRUE
## 22 85950 -0.1117880 5915 m2 0.0210 0.000 0.00000000 FALSE
## 23 89096 -0.5685820 6000 m2 0.0005 0.000 0.00000000 FALSE
## 24 95659 -0.5738520 5997 m2 0.0010 0.000 0.00000000 FALSE
## 25 103054 0.2102690 4228 m2 0.0650 0.003 0.03296703 TRUE
## 26 105727 0.0738757 4943 m2 0.1240 0.001 0.01098901 TRUE
## 27 110242 -1.1506800 5999 m2 0.0005 0.001 0.01098901 TRUE
## 28 112525 0.0880859 2707 m2 0.5750 0.002 0.02197802 TRUE
## 29 113610 0.9898900 5999 m2 0.0005 0.000 0.00000000 FALSE
## 30 114325 -0.6368310 5996 m2 0.0010 0.000 0.00000000 FALSE
## 31 115003 0.4981430 5996 m2 0.0005 0.000 0.00000000 FALSE
## 32 122439 0.8204620 6000 m2 0.0005 0.000 0.00000000 FALSE
## 33 123604 -0.0105875 4019 m2 0.6595 0.000 0.00000000 FALSE
## 34 129666 -0.1067380 5994 m2 0.0035 0.000 0.00000000 FALSE
## 35 130660 -0.2060310 768 m2 1.0000 0.000 0.00000000 FALSE
## 36 137299 0.9273030 6000 m2 0.0005 0.000 0.00000000 FALSE
## 37 137919 -0.0938825 5800 m2 0.1820 0.001 0.01098901 TRUE
## 38 148281 -0.5184100 5998 m2 0.0005 0.000 0.00000000 FALSE
## 39 175000 0.8000000 5780 m4 1.0000 0.000 NA TRUE
### Color recombination regions ####
lgs <- seq(50000, 500000, by=50000) # linkage groups recombination breakpoints 0.5
lg_whereplot <- lgs - 25000
(recom_rates <- as.numeric(unlist(strsplit(sim[1], " "))[-1]))
## [1] 1.00000e-05 5.00000e-01 1.00000e-05 5.00000e-01 1.00000e-05
## [6] 5.00000e-01 1.00000e-05 5.00000e-01 1.00000e-05 5.00000e-01
## [11] 1.00000e-05 5.00000e-01 1.00000e-05 5.00000e-01 1.00000e-05
## [16] 1.00000e-08 1.00000e-05 5.00000e-01 1.56098e-07 2.91295e-02
## [21] 2.85170e-03 3.92680e-06 2.92384e-05 5.58755e-08 4.77181e-05
## [26] 3.50242e-07 8.43974e-07 1.60727e-05 5.00000e-01 1.00000e-05
(recom_end <- as.integer(unlist(strsplit(sim[2], " "))[-1]))
## [1] 50000 50001 100000 100001 150000 150001 200000 200001 250000 250001
## [11] 300000 300001 350000 350001 370000 380000 400000 400001 400052 413592
## [21] 415805 418228 422879 428034 428082 433429 438943 450000 450001 500000
recom <- data.frame(recom_rates, recom_end)
recom$logrates <- log10(recom_rates)
# plot r=0.5 as black
# plot r=1e-11 as white
(brks <- with(recom, c(-12, -9, -6, -4, -2, 0.5)))
## [1] -12.0 -9.0 -6.0 -4.0 -2.0 0.5
grps <- with(recom, cut(logrates, breaks = brks, include.lowest = TRUE))
nlevels(grps)
## [1] 5
colfunc <- paste(colorRampPalette(colors=c( rgb(0,0,1,0.1), rgb(1,1,1,0), rgb(0,1,0,0.1)))(length(brks)-1), 70, sep="")
recom$col <- colfunc[grps]
plot(recom$logrates, col=recom$col)
### Replace chromsome 1 with actual chromosome positions ####
ends=c(0,lgs)
dim(vcf@gt)
## [1] 13011 1001
vcf@fix[,"CHROM"] <- NA
POS <- as.numeric(vcf@fix[,"POS"])
for (i in 1:(length(ends)-1)){
cond <- POS >= ends[i] & POS < ends[i+1]
print(c(ends[i], ends[i+1], sum(cond)))
vcf@fix[cond,"CHROM"] = i
}
## [1] 0 50000 1333
## [1] 50000 100000 1311
## [1] 100000 150000 1248
## [1] 150000 200000 1272
## [1] 200000 250000 1290
## [1] 250000 300000 1289
## [1] 300000 350000 1377
## [1] 350000 400000 1316
## [1] 400000 450000 1226
## [1] 450000 500000 1349
table(vcf@fix[,"CHROM"])
##
## 1 10 2 3 4 5 6 7 8 9
## 1333 1349 1311 1248 1272 1290 1289 1377 1316 1226
my_ord <- order(as.numeric(vcf@fix[,"POS"]))
vcf2 <- vcf
vcf2 <- vcf[my_ord,]
head(vcf2)
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.2"
## [1] "##fileDate=20171003"
## [1] "##source=SLiM"
## [1] "##INFO=<ID=MID,Number=1,Type=Integer,Description=\"Mutation ID in SLiM\">"
## [1] "##INFO=<ID=S,Number=1,Type=Float,Description=\"Selection Coefficient\">"
## [1] "##INFO=<ID=DOM,Number=1,Type=Float,Description=\"Dominance\">"
## [1] "First 6 rows."
## [1]
## [1] "***** Fixed section *****"
## CHROM POS ID REF ALT QUAL FILTER
## [1,] "1" "7" NA "A" "T" "1000" "PASS"
## [2,] "1" "19" NA "A" "T" "1000" "PASS"
## [3,] "1" "20" NA "A" "T" "1000" "PASS"
## [4,] "1" "70" NA "A" "T" "1000" "PASS"
## [5,] "1" "97" NA "A" "T" "1000" "PASS"
## [6,] "1" "100" NA "A" "T" "1000" "PASS"
## [1]
## [1] "***** Genotype section *****"
## FORMAT i0 i1 i2 i3 i4
## [1,] "GT" "1|1" "1|1" "1|1" "1|1" "1|1"
## [2,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [3,] "GT" "0|1" "0|0" "0|1" "0|0" "0|0"
## [4,] "GT" "0|1" "0|0" "1|1" "1|0" "1|0"
## [5,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [6,] "GT" "1|0" "1|1" "0|0" "1|1" "1|1"
## [1] "First 6 columns only."
## [1]
## [1] "Unique GT formats:"
## [1] "GT"
## [1]
head(vcf)
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.2"
## [1] "##fileDate=20171003"
## [1] "##source=SLiM"
## [1] "##INFO=<ID=MID,Number=1,Type=Integer,Description=\"Mutation ID in SLiM\">"
## [1] "##INFO=<ID=S,Number=1,Type=Float,Description=\"Selection Coefficient\">"
## [1] "##INFO=<ID=DOM,Number=1,Type=Float,Description=\"Dominance\">"
## [1] "First 6 rows."
## [1]
## [1] "***** Fixed section *****"
## CHROM POS ID REF ALT QUAL FILTER
## [1,] "7" "320001" NA "A" "T" "1000" "PASS"
## [2,] "4" "151786" NA "A" "T" "1000" "PASS"
## [3,] "8" "367566" NA "A" "T" "1000" "PASS"
## [4,] "6" "268461" NA "A" "T" "1000" "PASS"
## [5,] "4" "165067" NA "A" "T" "1000" "PASS"
## [6,] "2" "91930" NA "A" "T" "1000" "PASS"
## [1]
## [1] "***** Genotype section *****"
## FORMAT i0 i1 i2 i3 i4
## [1,] "GT" "0|0" "1|0" "0|0" "1|0" "0|1"
## [2,] "GT" "0|0" "0|0" "1|0" "0|1" "0|1"
## [3,] "GT" "0|0" "0|0" "1|0" "0|0" "0|0"
## [4,] "GT" "0|1" "0|1" "0|0" "1|0" "1|0"
## [5,] "GT" "1|1" "0|1" "0|0" "0|0" "0|0"
## [6,] "GT" "1|0" "0|0" "0|0" "0|0" "0|0"
## [1] "First 6 columns only."
## [1]
## [1] "Unique GT formats:"
## [1] "GT"
## [1]
Plotting functions Inversion is located at 320000 to 330000 bases the inversion “tracker” mutation is located at 320000
plot_r_legend <- function(){
### Plot recombination legend
xl <- 1
yb <- 1
xr <- 1.5
yt <- 2
ncol = length(brks)-1
par(mar=c(5.1,2.5,3.1,0.5))
plot(NA,type="n",ann=FALSE,xlim=c(1,2),ylim=c(1,2),xaxt="n",yaxt="n",bty="n")
mtext("r", side=3, adj=0.2, cex=2)
rect(
xl,
head(seq(yb,yt,(yt-yb)/ncol),-1),
xr,
tail(seq(yb,yt,(yt-yb)/ncol),-1),
col=colfunc
)
mtext(c(paste0("1e",round(brks[1:(length(brks)-1)],1)),0.5),side=2,at=seq(yb,yt,(yt-yb)/(ncol)),las=2,cex=0.7)
}
### Plot function
recom_end2 = c(0, recom_end)
plot_layers <- function(y_head=0, y_arrows=c(1,0.25), ...){
### Plot recombination variation
for (i in 1:nrow(recom))
{
polygon(x = c(recom_end2[i], recom$recom_end[i], recom$recom_end[i], recom_end2[i]),
y = c(-1000, -1000, 1000, 1000),
col=as.character(recom$col[i]), border = NA)
}
polygon(x=c(320000, 330000, 330000, 320000),
y = c(-1000, -1000, 1000, 1000),
col=rgb(1,0,0,0.5), border=NA)
abline(v=lgs)
text(lg_whereplot, y = y_head,
labels = c("LG1\nNeut", "LG2\nQTL", "LG3\nQTL",
"LG4\nSS", "LG5\nBS",
"LG6\nBS", "LG7\n Inversion",
"LG8\nmed r", "LG9\nr var", "LG10\nNeut"))
### Add QTLs and Sweep Location
arrows(muts$position[muts$count], y_arrows[1], muts$position[muts$count], y_arrows[2], col="orange", lwd=muts$prop[muts$count]*20, length = 0.1)
arrows(muts$position[muts$type=="m4"], y_arrows[1], muts$position[muts$type=="m4"], y_arrows[2], col="purple", lwd=2, length = 0.1)
} #end plot function
layout(matrix(1:2,nrow=1),widths=c(0.8,0.2))
par(mar=c(5.1,3.1,3.1,1.9))
plot(0,0, col="white", xlim=c(0, 500000), ylim=c(-1,1), xaxs="i", yaxt="n", ylab="", xlab="Position (bp)")
plot_layers()
plot_r_legend()
# NB: Creates a vcfR object (stored in RAM) which size is twice as big as the original vcf file. So when dealing with large data, make sure you have enough RAM space available before proceeding.
geno <- vcf2@gt[,-c(1)] # Character matrix containing the genotypes
position <- getPOS(vcf2)-1 # Positions in bp, add one to line up with SLIM
which((position) %in% muts$position)
## [1] 1383 1388 1400 1417 1436 1464 1580 1737 1756 1776 1778 1854 1855 1881
## [15] 1954 1974 2047 2097 2108 2113 2145 2270 2364 2545 2717 2795 2904 2967
## [29] 2988 3003 3018 3183 3217 3382 3414 3581 3590 3847 4519
chromosome <- getCHROM(vcf2) # Chromosome information
G <- matrix(NA, nrow = nrow(geno), ncol = ncol(geno))
G[geno %in% c("0/0", "0|0")] <- 0
G[geno %in% c("0/1", "1/0", "1|0", "0|1")] <- 1
G[geno %in% c("1/1", "1|1")] <- 2
## Remove fixed loci or all heterozygotes ####
dim(G)
## [1] 13011 1000
head(G[1:10,1:10])
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 2 2 2 2 2 2 2 2 2 2
## [2,] 0 0 0 0 0 0 0 0 0 0
## [3,] 1 0 1 0 0 0 0 0 2 1
## [4,] 1 0 2 1 1 2 1 2 2 1
## [5,] 0 0 0 0 0 0 0 0 0 0
## [6,] 1 2 0 2 2 1 2 0 0 1
rem = c(which(rowSums(G)==0), which(rowSums(G-2)==0)) ## fixed loci
position[rem]
## [1] 54966 73701 130660 175000
training <- list(G = G[-rem,], position = position[-rem], chromosome = chromosome[-rem])
vcf_filt <- vcf2[-rem,]
dim(vcf_filt@gt)
## [1] 13007 1001
### optional assignment to pops
toclust <- ind0[,c("x","y")]
d <- dist(toclust)
hc <- hclust(d, method="ward.D")
#fit <- kmeans(toclust, 30)
#plot(ind$x, ind$y, pch=fit$cluster, col=fit$cluster+1)
k <- 39
group <- cutree(hc, k=k)
table(group)
## group
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 20 25 24 29 29 29 51 21 41 39 42 33 18 24 37 12 21 33 25 27 49 27 30 19 21
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 22 13 36 18 37 13 19 24 13 24 12 9 18 16
(group_env <- sort(round(tapply(ind0$envi, group, mean), 1)))
## 5 33 15 37 14 8 20 34 7 11 23 25 2 19 36
## -1.7 -1.5 -1.4 -1.4 -1.3 -1.2 -1.1 -1.1 -0.9 -0.9 -0.9 -0.9 -0.8 -0.8 -0.8
## 39 35 4 10 17 6 22 9 26 1 24 27 13 16 18
## -0.6 -0.5 -0.4 -0.4 -0.4 -0.3 -0.3 -0.1 -0.1 0.0 0.0 0.1 0.2 0.2 0.3
## 28 29 30 32 3 12 21 38 31
## 0.3 0.4 0.4 0.4 0.5 0.6 0.9 0.9 1.0
(group_table <- data.frame(group=names(group_env), group_env, new_g = 1:39))
## group group_env new_g
## 5 5 -1.7 1
## 33 33 -1.5 2
## 15 15 -1.4 3
## 37 37 -1.4 4
## 14 14 -1.3 5
## 8 8 -1.2 6
## 20 20 -1.1 7
## 34 34 -1.1 8
## 7 7 -0.9 9
## 11 11 -0.9 10
## 23 23 -0.9 11
## 25 25 -0.9 12
## 2 2 -0.8 13
## 19 19 -0.8 14
## 36 36 -0.8 15
## 39 39 -0.6 16
## 35 35 -0.5 17
## 4 4 -0.4 18
## 10 10 -0.4 19
## 17 17 -0.4 20
## 6 6 -0.3 21
## 22 22 -0.3 22
## 9 9 -0.1 23
## 26 26 -0.1 24
## 1 1 0.0 25
## 24 24 0.0 26
## 27 27 0.1 27
## 13 13 0.2 28
## 16 16 0.2 29
## 18 18 0.3 30
## 28 28 0.3 31
## 29 29 0.4 32
## 30 30 0.4 33
## 32 32 0.4 34
## 3 3 0.5 35
## 12 12 0.6 36
## 21 21 0.9 37
## 38 38 0.9 38
## 31 31 1.0 39
ind0$group <- group
ind2 <- merge(ind0, group_table)
head(ind2)
## group id x y phenotype1 envi phenotype2 group_env
## 1 1 0 0.735243 0.184956 0.3655400 -0.06308660 0.3655400 0
## 2 1 578 0.706215 0.198238 -1.0801800 0.12929100 -1.0801800 0
## 3 1 108 0.739526 0.132165 -0.5631400 -0.04879510 -0.5631400 0
## 4 1 285 0.728465 0.184342 -0.0914679 -0.01696810 -0.0914679 0
## 5 1 827 0.733926 0.121190 -0.3357300 -0.00800448 -0.3357300 0
## 6 1 412 0.754067 0.123672 -0.5836690 -0.12760300 -0.5836690 0
## new_g
## 1 25
## 2 25
## 3 25
## 4 25
## 5 25
## 6 25
# the merge puts individuals out of order relative to the genotype matrix
# the id should put them back into order
ind <- ind2[order(ind2$id),]
head(ind)
## group id x y phenotype1 envi phenotype2 group_env
## 1 1 0 0.735243 0.184956 0.365540 -0.0630866 0.365540 0.0
## 23 2 1 0.862102 0.399151 0.224626 -0.8581350 0.224626 -0.8
## 47 3 2 0.698954 0.983423 0.254235 0.5735460 0.254235 0.5
## 79 4 3 0.182200 0.241539 -0.365072 -0.4049860 -0.365072 -0.4
## 104 5 4 0.979798 0.267355 -1.484780 -1.6525600 -1.484780 -1.7
## 129 6 5 0.175654 0.525988 -0.823659 -0.3668130 -0.823659 -0.3
## new_g
## 1 25
## 23 13
## 47 35
## 79 18
## 104 1
## 129 21
par(mfrow=c(1,1))
plot(ind$x, ind$y, pch=ind$group%%6, col=adjustcolor(ind$group%%3+2, alpha=0.2))
text(tapply(ind$x, ind$new_g, mean), tapply(ind$y, ind$new_g, mean), label=1:39)
#write.table(ind, "outputIndAll_pop.txt")
G0<-add_code256(big_copy(t(training$G),type="raw"),code=bigsnpr:::CODE_012)
## Warning: Assignment will down cast from double to raw.
## Hint: To remove this warning, use options(bigstatsr.typecast.warning = FALSE).
#puts it in the raw format and stores likelihood genotype probability
dim(G0)
## [1] 1000 13007
str(G)
## num [1:13011, 1:1000] 2 0 1 1 0 1 0 1 0 0 ...
head(G[,1:10])
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 2 2 2 2 2 2 2 2 2 2
## [2,] 0 0 0 0 0 0 0 0 0 0
## [3,] 1 0 1 0 0 0 0 0 2 1
## [4,] 1 0 2 1 1 2 1 2 2 1
## [5,] 0 0 0 0 0 0 0 0 0 0
## [6,] 1 2 0 2 2 1 2 0 0 1
newpc<-snp_autoSVD(G=G0,infos.chr = training$chromosome,infos.pos = training$position)
## Phase of clumping at r2 > 0.2.. keep 7570 SNPs.
##
## Iteration 1:
## Computing SVD..
##
## Converged!
# this is doing SNP pruning - removing correlated SNPs
# take snps with highest MAF and correlate snps around it
# Snps with R^2 > 0.2 are removed
# the subset is the indexes of the remaining SNPs
str(newpc)
## List of 7
## $ d : num [1:10] 195 166 164 161 160 ...
## $ u : num [1:1000, 1:10] 0.0262 0.0247 0.038 -0.0185 -0.057 ...
## $ v : num [1:7570, 1:10] 0.01034 0.00224 0.00278 0.00428 -0.00117 ...
## $ niter : int 8
## $ nops : int 156
## $ center: num [1:7570] 1.986 0.001 0.001 0.898 0.009 ...
## $ scale : num [1:7570] 0.1179 0.0316 0.0316 0.7034 0.0947 ...
## - attr(*, "class")= chr "big_SVD"
## - attr(*, "subset")= int [1:7570] 1 5 7 8 9 12 13 15 16 17 ...
## - attr(*, "lrldr")='data.frame': 0 obs. of 3 variables:
## ..$ Chr : int(0)
## ..$ Start: int(0)
## ..$ Stop : int(0)
plot(newpc)
which_pruned <- attr(newpc, which="subset")
length(which_pruned)
## [1] 7570
### Calculate average LD around each SNP ####
# LD <- LD2<- rep(NA, ncol(G0))
# the following is not my most efficient lines of code
# for(i in 26:(ncol(G0)-26)){
# LD[i]=mean(cor(G0[,(i-25):(i+25)])[,25])
# }
layout(matrix(1))
# plot(training$position, abs(LD), pch=20, ylim=c(0,0.18), xaxs="i", col=rgb(0,0,0,0.5), xlab="Position (bp)", ylab="Average LD 50-SNP windows")
# plot_layers(y_head=0.17, y_arrows=c(0.02, 0))
hist(training$position[which_pruned], breaks=seq(0,500000, by=10000), col="lightgrey")
plot_layers(y_head=20, y_arrows=c(40,25))
(inv_index <- which(training$position==(320000)))
## [1] 8279
head(training$position[training$position>319998])
## [1] 320000 320052 320059 320061 320070 320086
inv_all <- which(training$position>320000 & training$position<330000 & rowSums(training$G)>100)
# remove low H individuals
inv_allG <- training$G[inv_all,]
dim(training$G)
## [1] 13007 1000
inv <- training$G[inv_index,]
# Genotype frequencies of inversion marker
table(inv)/(1000)
## inv
## 0 1 2
## 0.406 0.450 0.144
# basically, expect individuals to be of type 0 or type 2
# Allele frequency of inversion marker
1-(sum(inv)/2000)
## [1] 0.631
## Haplotypes of inversion marker
gen <- apply(inv_allG, 1, function(x) (paste(x, collapse = "", sep="")))
nlevels(factor(gen))
## [1] 57
inv_freq <- tapply(inv, ind$new_g, FUN = function(x)(sum(x)/(2*length(x))))
hist(inv_freq)
pop_df <- data.frame(new_g=rownames(inv_freq), inv_freq=inv_freq,
pop_x =tapply(ind$x, ind$new_g, mean),
pop_y = tapply(ind$y, ind$new_g, mean),
pop_envi = tapply(ind$envi, ind$new_g, mean)
)
qplot(x = pop_df$pop_x, y = pop_df$pop_y, colour = pop_df$inv_freq) + theme_bw() + geom_text(aes(x = pop_df$pop_x, y = pop_df$pop_y, label=pop_df$new_g), nudge_y = 0.02)#+ scale_colour_manual(breaks = seq(0,1, by = 0.1) , values = two.colors(n=10,"red", "blue", middle="grey"))
### Let's say only two pops were sampled that happened to differ in freq of inversion
ind_sub <- which(ind$new_g %in% c(15, 32))
dim(G)
## [1] 13011 1000
G_sub <- training$G[, ind_sub]
dim(G_sub)
## [1] 13007 30
FST_sub <- MakeDiploidFSTMat(t(G_sub),locusNames = training$position, ind$new_g[ind_sub])
## Calculating FSTs, may take a few minutes...
## [1] "10000 done of 13007"
plot(FST_sub$LocusName[FST_sub$He>0.1], FST_sub$FST[FST_sub$He>0.1], col=rgb(0,0,0, 0.3), pch=20, ylim=c(0,0.5))
plot_layers(y_head=0.45, y_arrows = c(0.4, 0.35))
(q = quantile( FST_sub$FST[FST_sub$He>0.1], 0.95, na.rm=TRUE))
## 95%
## 0.1708369
points(320000, FST_sub$FST[FST_sub$LocusName==320000])
abline(a=q, b=0, col="grey")
aux<-pcadapt(training$G,K=10)
## Number of SNPs: 13007
## Number of individuals: 1000
str(aux)
## List of 10
## $ scores : num [1:1000, 1:10] 0.0332 -0.012 0.0363 -0.0107 -0.015 ...
## $ singular.values: num [1:10] 11.15 7.27 6.78 6.2 5.54 ...
## $ zscores : num [1:13007, 1:10] 0 0 0.379 -3.048 0 ...
## $ loadings : num [1:13007, 1:10] 0 0 0.128 -0.951 0 ...
## $ maf : num [1:13007] 0.007 0.007 0.164 0.393 0.0005 0.423 0.0005 0.449 0.0045 0.2 ...
## $ missing : num [1:13007] 0 0 0 0 0 0 0 0 0 0 ...
## $ stat : num [1:13007(1d)] NA NA 1.57 22.57 NA ...
## $ gif : num 1.19
## $ chi2.stat : num [1:13007(1d)] NA NA 1.31 18.93 NA ...
## $ pvalues : num [1:13007] NA NA 0.9994 0.0412 NA ...
## - attr(*, "class")= chr "pcadapt"
## - attr(*, "K")= num 10
## - attr(*, "data.type")= chr "genotype"
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.05
plot(aux,option="screeplot")
#num <- max(which(training$position<300000))
num <- length(training$position)
x <-pcadapt(training$G[1:num,],K=4)
## Number of SNPs: 13007
## Number of individuals: 1000
summary(x)
## Length Class Mode
## scores 4000 -none- numeric
## singular.values 4 -none- numeric
## zscores 52028 -none- numeric
## loadings 52028 -none- numeric
## maf 13007 -none- numeric
## missing 13007 -none- numeric
## stat 13007 -none- numeric
## gif 1 -none- numeric
## chi2.stat 13007 -none- numeric
## pvalues 13007 -none- numeric
par(mar=c(4,4,1,1))
plot(x$scores[,1], x$scores[,2])
haplotype <- as.factor(training$G[inv_index,])
table(haplotype)
## haplotype
## 0 1 2
## 406 450 144
qplot(x$scores[,1], x$scores[,2], colour=haplotype, main="Individual scores without LD pruning") + scale_colour_manual(values = two.colors(n=3,"red", "blue", middle="grey")) + theme_bw()
#plot_layers(ylim=c(min(x$loadings[,1]), max(x$loadings[,1])), ylab="Loadings PC1")
layout(matrix(c(1,2,3,4,6,5,5,6),nrow=4),widths=c(0.8,0.2))
par(oma=c(3,3,1,0), mar=c(2,2,0,0))
## Top plot
summary(x$loadings[,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -10.372078 0.000000 0.000000 0.000195 0.000000 10.372078
plot(training$position,x$loadings[,1], xaxs="i", pch=20, ylim=c(-11, 15))
plot_layers(y_head = 12, y_arrows=c(-8, -11))
## Middle plot
summary(x$loadings[,2])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -13.746337 0.000000 0.000000 0.009282 0.000000 13.746337
plot(training$position,x$loadings[,2], xaxs="i", pch=20, ylim=c(-11, 13))
plot_layers(y_head = 20, y_arrows=c(-8, -11))
## Middle plot
summary(x$loadings[,3])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -16.204653 0.000000 0.000000 -0.005638 0.000000 8.877744
plot(training$position,x$loadings[,3], xaxs="i", pch=20, ylim=c(-15, 15))
plot_layers(y_head = 20, y_arrows=c(-12, -15))
## Bottom plot
summary(x$loadings[,4])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -15.616251 0.000000 0.000000 -0.000401 0.000000 7.853880
plot(training$position,x$loadings[,3], xaxs="i", pch=20, ylim=c(-13, 13))
plot_layers(y_head = 20, y_arrows=c(-10, -13))
## Right plot
plot_r_legend()
mtext("Position (bp)", outer=TRUE, side=1, line=1, adj=0.4)
mtext("Loading on PC axis", outer=TRUE, side=2, line=1, adj=0.5)
par(mfrow=c(1,1))
plot(training$position[1:num], sqrt(x$chi2.stat))
plot_layers(y_head = 20, y_arrows=c(-10, -13))
dim(training$G)
## [1] 13007 1000
aux<-pcadapt(training$G[which_pruned,],K=30)
## Number of SNPs: 7570
## Number of individuals: 1000
str(aux)
## List of 10
## $ scores : num [1:1000, 1:30] 0.0226 0.036 0.0336 -0.0246 -0.0377 ...
## $ singular.values: num [1:30] 4.49 3.82 3.79 3.7 3.67 ...
## $ zscores : num [1:7570, 1:30] 0 0 0 0.907 0 ...
## $ loadings : num [1:7570, 1:30] 0 0 0 0.536 0 ...
## $ maf : num [1:7570] 0.007 0.0005 0.0005 0.449 0.0045 ...
## $ missing : num [1:7570] 0 0 0 0 0 0 0 0 0 0 ...
## $ stat : num [1:7570(1d)] NA NA NA 27.1 NA ...
## $ gif : num 1.1
## $ chi2.stat : num [1:7570(1d)] NA NA NA 24.6 NA ...
## $ pvalues : num [1:7570] NA NA NA 0.745 NA ...
## - attr(*, "class")= chr "pcadapt"
## - attr(*, "K")= num 30
## - attr(*, "data.type")= chr "genotype"
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.05
plot(aux,option="screeplot")
x_LD <-pcadapt(training$G[which_pruned,],K=3)
## Number of SNPs: 7570
## Number of individuals: 1000
summary(x_LD)
## Length Class Mode
## scores 3000 -none- numeric
## singular.values 3 -none- numeric
## zscores 22710 -none- numeric
## loadings 22710 -none- numeric
## maf 7570 -none- numeric
## missing 7570 -none- numeric
## stat 7570 -none- numeric
## gif 1 -none- numeric
## chi2.stat 7570 -none- numeric
## pvalues 7570 -none- numeric
par(mar=c(4,4,1,1))
qplot(x_LD$scores[,1], x_LD$scores[,2], colour=ind$envi, main="Individual scores with LD pruning", xlab="PC1 scores", ylab="PC2 scores" ) + theme_bw() + labs(colour="Envi") + scale_color_gradient2( low="blue", mid="white",
high="red", space ="Lab" )
#plot_layers(ylim=c(min(x$loadings[,1]), max(x$loadings[,1])), ylab="Loadings PC1")
layout(matrix(c(1,2,3,3),nrow=2),widths=c(0.8,0.2))
par(oma=c(3,3,1,0), mar=c(2,2,0,0))
## Top plot
summary(x_LD$loadings[,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -9.112180 0.000000 0.000000 -0.001167 0.000000 17.344448
plot(training$position[which_pruned],x_LD$loadings[,1], xaxs="i", pch=20, ylim=c(-11, 15))
plot_layers(y_head = 12, y_arrows=c(-5, -11))
## Middle plot
summary(x_LD$loadings[,2])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.866679 0.000000 0.000000 0.004825 0.000000 7.513840
plot(training$position[which_pruned],x_LD$loadings[,2], xaxs="i", pch=20, ylim=c(-11, 13))
plot_layers(y_head = 20, y_arrows=c(-8, -11))
## Right plot
plot_r_legend()
mtext("Position (bp)", outer=TRUE, side=1, line=1, adj=0.4)
mtext("Loading on PC axis", outer=TRUE, side=2, line=1, adj=0.5)
par(mfrow=c(1,1))
plot(training$position[which_pruned], sqrt(x_LD$chi2.stat))
plot_layers(y_head = 10, y_arrows=c(2, 0))
write.geno(t(training$G), paste0("temp/",seed,"genotypes.geno"))
## [1] "temp/1505550948364genotypes.geno"
project = snmf(paste0("temp/",seed,"genotypes.geno"),
K = 1:4,
entropy = TRUE,
repetitions = 3,
project = "new")
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] 856455047
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -s (seed random init) 856455047
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338912851847
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 2304955.828704
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run1/1505550948364genotypes_r1.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.306479
## Cross-Entropy (masked data): 0.308396
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 13798925943681349511
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [========]
## Number of iterations: 21
##
## Least-square error: 2262132.759627
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run1/1505550948364genotypes_r1.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.300584
## Cross-Entropy (masked data): 0.304192
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338912851847
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [========]
## Number of iterations: 21
##
## Least-square error: 2240540.349344
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run1/1505550948364genotypes_r1.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.29652
## Cross-Entropy (masked data): 0.301661
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 4607182419656472455
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [============]
## Number of iterations: 33
##
## Least-square error: 2232949.724775
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run1/1505550948364genotypes_r1.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.296249
## Cross-Entropy (masked data): 0.301738
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] 967052812
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -s (seed random init) 967052812
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140339023449612
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 2305329.694964
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run2/1505550948364genotypes_r2.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.306498
## Cross-Entropy (masked data): 0.308057
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140339023449612
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=========]
## Number of iterations: 25
##
## Least-square error: 2262605.070080
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run2/1505550948364genotypes_r2.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.300606
## Cross-Entropy (masked data): 0.303705
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140339023449612
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=======]
## Number of iterations: 20
##
## Least-square error: 2240585.210426
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run2/1505550948364genotypes_r2.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.296549
## Cross-Entropy (masked data): 0.301198
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140339023449612
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [===========================]
## Number of iterations: 73
##
## Least-square error: 2231811.742102
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run2/1505550948364genotypes_r2.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.295804
## Cross-Entropy (masked data): 0.300929
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] 2065672607
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -s (seed random init) 2065672607
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140340122069407
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 2305370.378477
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K1/run3/1505550948364genotypes_r3.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.306441
## Cross-Entropy (masked data): 0.309102
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140340122069407
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [========]
## Number of iterations: 22
##
## Least-square error: 2262306.059036
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K2/run3/1505550948364genotypes_r3.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.300557
## Cross-Entropy (masked data): 0.304635
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140340122069407
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=========]
## Number of iterations: 23
##
## Least-square error: 2240585.766695
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K3/run3/1505550948364genotypes_r3.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.296501
## Cross-Entropy (masked data): 0.302019
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 6360639903
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [============================]
## Number of iterations: 74
##
## Least-square error: 2231517.964652
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 13007
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/K4/run3/1505550948364genotypes_r3.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes.snmf/masked/1505550948364genotypes_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.295621
## Cross-Entropy (masked data): 0.301694
## The project is saved into :
## temp/1505550948364genotypes.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes.snmfProject")
par(mfrow=c(1,1), mar=c(3,3,3,1))
#project
# plot cross-entropy criterion of all runs of the project
plot(project, cex = 1.2, col = "blue", pch = 19)
# get the cross-entropy of all runs for K = 3
ce = cross.entropy(project, K = 2)
ce
## K = 2
## run 1 0.3041921
## run 2 0.3037046
## run 3 0.3046345
# select the run with the lowest cross-entropy for K = 2
best = which.min(ce)
# display the Q-matrix
Q.matrix <- as.matrix(Q(project, K = 2, run = best))
dim(Q.matrix)
## [1] 1000 2
cluster <- apply(Q.matrix, 1, which.max)
my.colors <- c("tomato", "lightblue", "olivedrab")#, "gold")
ord <- order(ind$envi)
dim(Q.matrix)
## [1] 1000 2
bp <- barplot(t(Q.matrix[ord,]),
border = NA,
space = 0,
col = my.colors,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix")
#axis(1, at = 1:nrow(Q.matrix), labels = bp$order, las = 3, cex.axis = .4)
# get the ancestral genotype frequency matrix, G, for the 2nd run for K = 4.
G.matrix = G(project, K = 3, run = 2)
write.geno(t(training$G[which_pruned,]), paste0("temp/",seed,"genotypes_LD.geno"))
## [1] "temp/1505550948364genotypes_LD.geno"
project_LD = snmf(paste0("temp/",seed,"genotypes_LD.geno"),
K = 1:4,
entropy = TRUE,
repetitions = 3,
project = "new")
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] 819513461
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -s (seed random init) 819513461
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 69538990197
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 1166401.278456
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run1/1505550948364genotypes_LD_r1.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.268256
## Cross-Entropy (masked data): 0.269889
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 5114480757
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [======================]
## Number of iterations: 58
##
## Least-square error: 1160246.068115
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run1/1505550948364genotypes_LD_r1.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.266057
## Cross-Entropy (masked data): 0.268808
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338875910261
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [===========================================================================]
## Number of iterations: 200
##
## Least-square error: 1155945.694014
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run1/1505550948364genotypes_LD_r1.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.264849
## Cross-Entropy (masked data): 0.268643
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 1 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 9409448053
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=================================================]
## Number of iterations: 132
##
## Least-square error: 1151566.128324
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run1/1505550948364genotypes_LD_r1.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.263821
## Cross-Entropy (masked data): 0.268455
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] 926982599
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -s (seed random init) 926982599
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338983379399
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 1166551.300678
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run2/1505550948364genotypes_LD_r2.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.268264
## Cross-Entropy (masked data): 0.269766
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338983379399
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [===================]
## Number of iterations: 51
##
## Least-square error: 1160426.114510
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run2/1505550948364genotypes_LD_r2.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.266011
## Cross-Entropy (masked data): 0.268871
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338983379399
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [===========================================================================]
## Number of iterations: 200
##
## Least-square error: 1156602.719328
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run2/1505550948364genotypes_LD_r2.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.264843
## Cross-Entropy (masked data): 0.268781
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 2 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140338983379399
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [===========================================================================]
## Number of iterations: 200
##
## Least-square error: 1151719.887759
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run2/1505550948364genotypes_LD_r2.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.263769
## Cross-Entropy (masked data): 0.26852
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] 2105482023
## [1] "*************************************"
## [1] "* create.dataset *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -s (seed random init) 2105482023
## -r (percentage of masked data) 0.05
## -x (genotype file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -o (output file in .geno format) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
##
## Write genotype file with masked data, /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
## [1] "*************************************"
## [1] "* sNMF K = 1 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 4607182420905499431
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
##
## Least-square error: 1166845.854356
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 1
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K1/run3/1505550948364genotypes_LD_r3.1.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.26827
## Cross-Entropy (masked data): 0.26964
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 2 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140340161878823
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [==================]
## Number of iterations: 49
##
## Least-square error: 1160470.808113
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 2
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K2/run3/1505550948364genotypes_LD_r3.2.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.266047
## Cross-Entropy (masked data): 0.268541
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 3 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 18446744071520066343
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [=================]
## Number of iterations: 45
##
## Least-square error: 1155857.618156
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 3
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K3/run3/1505550948364genotypes_LD_r3.3.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.264973
## Cross-Entropy (masked data): 0.268361
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## [1] "*************************************"
## [1] "* sNMF K = 4 repetition 3 *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (input file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## -q (individual admixture file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.Q
## -g (ancestral frequencies file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.G
## -i (number max of iterations) 200
## -a (regularization parameter) 10
## -s (seed random init) 140340161878823
## -e (tolerance error) 1E-05
## -p (number of processes) 1
## - diploid
##
## Read genotype file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno: OK.
##
##
## Main algorithm:
## [ ]
## [============================]
## Number of iterations: 76
##
## Least-square error: 1151788.050642
## Write individual ancestry coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.Q: OK.
## Write ancestral allele frequency coefficient file /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.G: OK.
##
## [1] "*************************************"
## [1] "* cross-entropy estimation *"
## [1] "*************************************"
## summary of the options:
##
## -n (number of individuals) 1000
## -L (number of loci) 7570
## -K (number of ancestral pops) 4
## -x (genotype file) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.geno
## -q (individual admixture) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.Q
## -g (ancestral frequencies) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/K4/run3/1505550948364genotypes_LD_r3.4.G
## -i (with masked genotypes) /Users/katie/Desktop/RecombinationGenomeScans/temp/1505550948364genotypes_LD.snmf/masked/1505550948364genotypes_LD_I.geno
## - diploid
##
## Cross-Entropy (all data): 0.263924
## Cross-Entropy (masked data): 0.26824
## The project is saved into :
## temp/1505550948364genotypes_LD.snmfProject
##
## To load the project, use:
## project = load.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
##
## To remove the project, use:
## remove.snmfProject("temp/1505550948364genotypes_LD.snmfProject")
#project
# plot cross-entropy criterion of all runs of the project
plot(project_LD, cex = 1.2, col = "blue", pch = 19)
# get the cross-entropy of all runs for K = 3
ce = cross.entropy(project_LD, K = 2)
ce
## K = 2
## run 1 0.2688083
## run 2 0.2688712
## run 3 0.2685408
# select the run with the lowest cross-entropy for K = 2
best = which.min(ce)
# display the Q-matrix
Q.matrix <- as.matrix(Q(project_LD, K = 2, run = best))
dim(Q.matrix)
## [1] 1000 2
cluster <- apply(Q.matrix, 1, which.max)
my.colors <- c("tomato", "lightblue")#, "olivedrab")#, "gold")
ord <- order(ind$envi)
dim(Q.matrix)
## [1] 1000 2
par(mfrow=c(1,1), mar=c(4,4,3,1))
bp <- barplot(t(Q.matrix[ord,]),
border = NA,
space = 0,
col = my.colors,
xlab = "Individuals (sorted by environment)",
ylab = "Ancestry proportions",
main = "Ancestry matrix")
#axis(1, at = 1:nrow(Q.matrix), labels = bp$order, las = 3, cex.axis = .4)
# get the ancestral genotype frequency matrix, G, for the 2nd run for K = 4.
G.matrix = G(project, K = 3, run = 2)
dim(training$G)
## [1] 13007 1000
FstDataFrame <- MakeDiploidFSTMat(t(training$G),training$position,ind$group)
## Calculating FSTs, may take a few minutes...
## [1] "10000 done of 13007"
head(FstDataFrame)
## LocusName He FST T1 T2 FSTNoCorr
## 1 6 0.0139020 0.003361768 2.338152e-05 0.0069551250 0.02268250
## 2 18 0.0139020 0.003361768 2.338152e-05 0.0069551250 0.02268250
## 3 19 0.2742080 0.008486843 1.164493e-03 0.1372115720 0.02933242
## 4 69 0.4771020 0.024441181 5.837472e-03 0.2388375695 0.04304795
## 5 96 0.0009995 -0.001051497 -5.257326e-07 0.0004999849 0.01856812
## 6 99 0.4881420 0.016059153 3.923439e-03 0.2443117104 0.03599456
## T1NoCorr T2NoCorr meanAlleleFreq
## 1 1.577722e-04 0.0069556776 0.0070
## 2 1.577722e-04 0.0069556776 0.9930
## 3 4.025093e-03 0.1372233360 0.8360
## 4 1.028225e-02 0.2388558483 0.3930
## 5 9.284527e-06 0.0005000253 0.9995
## 6 8.794614e-03 0.2443317427 0.4230
#str(FstDataFrame)
par(mfrow=c(1,1))
plot(FstDataFrame$He, FstDataFrame$FST)
plot(as.numeric(FstDataFrame$LocusName)[FstDataFrame$He>0.1], FstDataFrame$FST[FstDataFrame$He>0.1], ylim=c(0,0.2), pch=19, col=rgb(0,0,0,0.1))
plot_layers(y_head=0.1, y_arrows=c(0.1,0.06))
k <- 39 ## Number of pops
out_ini <- OutFLANK(FstDataFrame, NumberOfSamples=k)
## Run outflank on FST dataframe
#out_ini <- OutFLANK(FstDataFrame[FstDataFrame$He>0.05,], NumberOfSamples=k)
## Run outflank without low He loci
# Plot results to compare chi-squared distribution vs. actual FST distribution
OutFLANKResultsPlotter(out_ini, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
FALSE, RightZoomFraction = 0.05, titletext = NULL)
## Poor fit, particularly on right tail
OutFLANKResultsPlotter(out_ini, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
TRUE, RightZoomFraction = 0.15, titletext = NULL)
# Histogram of P-values weird
hist(out_ini$results$pvaluesRightTail,breaks = 20)
#### LD Pruning ####
#### Evaluating OutFLANK with pruned data ####
plot(FstDataFrame$He[which_pruned], FstDataFrame$FST[which_pruned])
Fstdf2 <- FstDataFrame[which_pruned,]
dim(Fstdf2)
## [1] 7570 9
Fstdf3 <- Fstdf2[Fstdf2$He>0.05,]
### Trimming without He trimming
out_trim1 <- OutFLANK(Fstdf2, NumberOfSamples=k, Hmin = 0.05)
OutFLANKResultsPlotter(out_trim1, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
FALSE, RightZoomFraction = 0.15, titletext = NULL)
out_trim <- OutFLANK(Fstdf3, NumberOfSamples=k)
# I do not think that OutFLANK is removing low H loci correctly
# The fit is much better if I remove these manually than if I do not
head(out_trim$results)
## LocusName He FST T1 T2 FSTNoCorr
## 8 135 0.4947980 0.015079867 0.003734289 0.2476341 0.03474974
## 15 501 0.2647020 0.009371741 0.001241315 0.1324530 0.02864995
## 17 642 0.2983875 0.014339649 0.002141371 0.1493322 0.03398947
## 28 944 0.2528955 0.008735190 0.001105401 0.1265457 0.02890669
## 30 963 0.2239755 0.017437018 0.001954683 0.1120996 0.03626194
## 40 1384 0.4896320 0.014271419 0.003497093 0.2450417 0.03377313
## T1NoCorr T2NoCorr meanAlleleFreq indexOrder GoodH qvalues
## 8 0.008605917 0.2476541 0.5510 1 goodH 0.9808741
## 15 0.003795074 0.1324635 0.8430 2 goodH 0.9808741
## 17 0.005076132 0.1493442 0.1825 3 goodH 0.9808741
## 28 0.003658322 0.1265562 0.8515 4 goodH 0.9808741
## 30 0.004065264 0.1121083 0.8715 5 goodH 0.9808741
## 40 0.008276489 0.2450614 0.5720 6 goodH 0.9808741
## pvalues pvaluesRightTail OutlierFlag
## 8 0.9034933 0.4517466 FALSE
## 15 0.5198422 0.7400789 FALSE
## 17 0.9751864 0.4875932 FALSE
## 28 0.5423314 0.7288343 FALSE
## 30 0.7666788 0.3833394 FALSE
## 40 0.9958455 0.4979228 FALSE
plot(out_trim$results$He, out_trim$results$FST)
OutFLANKResultsPlotter(out_trim, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
FALSE, RightZoomFraction = 0.15, titletext = NULL)
OutFLANKResultsPlotter(out_trim, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
TRUE, RightZoomFraction = 0.10, titletext = NULL)
# Decent distribution fit, no trimming needed.
hist(out_trim$results$pvaluesRightTail,breaks = 20)
# still a conservative histogram, which is typical of outflank
outliers_crit <- out_trim$results$qvalues<0.2
(outliers_LD <- out_trim$results$LocusName[outliers_crit])
## [1] 41662 53012 64875 67058 79995 81126 141163 202865 291111 293201
## [11] 323034
# outliers identified
### Plot trimmed data only
plot(out_trim$results$LocusName,out_trim$results$FST, ylim=c(0,0.1), xaxs="i")
plot_layers(y_head=0.1, y_arrows=c(0.1,0.06))
points(out_trim$results$LocusName[outliers_crit],
out_trim$results$FST[outliers_crit], pch=19)
### Ad-hoc estimates of p-values for all data
Fstdf_adhoc <- FstDataFrame
new_dist <- FstDataFrame$FSTNoCorr*out_trim$dfInferred/out_trim$FSTNoCorrbar
hist(new_dist)
Fstdf_adhoc$p <- pchisq(new_dist, df = out_trim$dfInferred)
Fstdf_adhoc$p[Fstdf_adhoc$He<0.1] <- NA
hist(Fstdf_adhoc$p)
plot(-log10(1-Fstdf_adhoc$p), Fstdf_adhoc$FST)
Fstdf_adhoc$q <- qvalue(1-Fstdf_adhoc$p)$qvalues
plot(Fstdf_adhoc$q, Fstdf_adhoc$FST)
plot(as.numeric(Fstdf_adhoc$LocusName)[Fstdf_adhoc$He>0.1], Fstdf_adhoc$FST[Fstdf_adhoc$He>0.1], ylim=c(0,0.1), xaxs="i")
plot_layers(y_head=0.1, y_arrows=c(0.09, 0.07))
points(Fstdf_adhoc$LocusName[Fstdf_adhoc$q<0.2], Fstdf_adhoc$FST[Fstdf_adhoc$q<0.2], pch=20)
plot(x, option = "qqplot", threshold = 0.05, main="pcadapt")
#plot(x, option = "stat.distribution")
summary(x$chi2.stat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.01 1.90 3.36 1907.64 6.02 87583.62 8483
# Default output from PCAdapt
par(mfrow=c(1,1))
plot(training$position, sqrt(x$chi2.stat), col="black", pch=20, main="pcadapt without LD pruning", ylim=c(0,500))
plot_layers(y_head=450, y_arrows = c(50,0))
plot(x_LD, option = "qqplot", threshold = 0.05, main="pcadapt")
#plot(x, option = "stat.distribution")
summary(x_LD$chi2.stat)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.010 1.190 2.366 3.289 4.306 147.442 5300
# Default output from PCAdapt
par(mfrow=c(1,1))
plot(training$position[which_pruned], x_LD$chi2.stat, col="black", pch=20, main="pcadapt without LD pruning", ylim=c(0,100))
plot_layers(y_head=450, y_arrows = c(100,70))
# extract scaled genotypes
scaled.genotype <- scale(as.matrix(t(training$G)))
#scaled.genotype <- as.matrix(t(sim1$G))
# extract scaled phenotypes
phen <- scale(as.matrix(ind$phenotype1))
# centering is important to remove mean
# x <- scale(as.matrix(sim1$phenotype1), center=TRUE, scale=FALSE)
# x <- as.matrix(sim1$phenotype1)
# to do mean and not SD. this might make it possible to get effect sizes
#pc <- prcomp(scaled.genotype,)
#plot(pc$sdev[1:20]^2)
#points(5,pc$sdev[5]^2, type = "h", lwd = 3, col = "blue")
# ridge regression
lfmm.ridge <- lfmm::lfmm_ridge(Y = scaled.genotype, X = phen, K = 3, lambda = 1e-4)
#The lfmm.ridge object contains estimates for the latent variables and for the effect sizes. Here, the estimates are used for computing calibrated significance values and for testing associations between the response matrix Y and the explanatory variable x. It can be done as follows:
lfmm.test <- lfmm::lfmm_test(Y = scaled.genotype, X = phen, lfmm = lfmm.ridge, calibrate = "gif")
p.values <- lfmm.test$calibrated.pvalue
lfmm.test$gif
## [1] 3.388228
hist(p.values, col = "lightgreen", main="LFMM ridge")
qval <- qvalue::qvalue(p.values)
plot(qval)
#The plot suggests that setting fdr.level = 0.025 warrant few false positives.
qval <- qvalue::qvalue(p.values, fdr.level = 0.005)
candidates <- which(qval$significant)
plot(training$position, -log10(p.values), cex = .5, pch = 19, col = "black", main="LFMM ridge", ylim=c(0, 60))
plot_layers(y_head=55, y_arrows=c(10, 0))
#### Effect sizes ridge regression step 1: run lfmm_ridge (or any lfmm model) and get the estimated latent factors from the U matrix (obj.lfmm$U). When lfmm is run with K factors and n individuals, U is an n by K matrix.
step 2: perform a standard linear regression analysis of the phenotype on the SNP frequency (in the direction opposite to LFMM) by adding U as covariate to the model. This will estimate the LFMM effect size for each SNP. The R command should look like this: lm( y ~ . , data = data.frame(genotype[,i], U))
str(lfmm.ridge)
## List of 5
## $ K : num 3
## $ lambda: num 1e-04
## $ U : num [1:1000, 1:3] 10.87 -5.18 12.88 -3.15 -4.62 ...
## $ V : num [1:13007, 1:3] -0.000918 0.000918 0.000393 -0.008504 -0.001573 ...
## $ B : num [1:13007, 1] 0.0469 -0.0469 0.037 0.0174 0.011 ...
## - attr(*, "class")= chr "ridgeLFMM"
m2 <- which(training$position %in% (muts$position[muts$type=="m2" & muts$count==TRUE]))
dim(G)
## [1] 13011 1000
effects <- data.frame(position=training$position[m2], est_coef_ridge=NA, est_coef_PC=NA)
### Try Olivier's suggestion
for (i in 1:length(m2)){
effects$est_coef_ridge[i] <- lm(phen ~., data = data.frame(gen = training$G[m2[i],], lfmm.ridge$U))$coef[2]
}
### Use PC axes as covariates
for (i in 1:length(m2)){
effects$est_coef_PC[i] <- lm(phen ~., data = data.frame(gen = training$G[m2[i],], x_LD$scores))$coef[2]
}
effects
## position est_coef_ridge est_coef_PC
## 1 51581 -1.26567006 -0.54155789
## 2 53012 1.49269679 0.47778962
## 3 53800 -0.68882563 -1.09829457
## 4 67017 0.03244131 0.07870101
## 5 67058 1.00111307 0.69022729
## 6 69766 -1.25495464 -0.59235010
## 7 70769 -1.25212009 -0.43574525
## 8 79995 -0.63273782 -0.19321557
## 9 81126 1.38199308 0.55501515
## 10 103054 1.05941901 0.35069708
## 11 105727 0.46937414 0.03574035
## 12 110242 -3.67957137 -2.78866071
## 13 112525 0.33567171 0.16265880
## 14 137919 -0.43395260 -0.19213616
(new_muts <- merge(muts,effects))
## position selCoef originGen type freq pa2 prop count
## 1 51581 -0.3446080 5987 m2 0.0080 0.001 0.01098901 TRUE
## 2 53012 0.2757600 5395 m2 0.0340 0.002 0.02197802 TRUE
## 3 53800 -1.0843600 5999 m2 0.0010 0.001 0.01098901 TRUE
## 4 67017 0.1294500 2702 m2 0.8465 0.002 0.02197802 TRUE
## 5 67058 0.4800990 53 m2 0.5045 0.058 0.63736264 TRUE
## 6 69766 -0.3176070 4318 m2 0.0770 0.007 0.07692308 TRUE
## 7 70769 -0.5431350 5988 m2 0.0080 0.002 0.02197802 TRUE
## 8 79995 -0.1745930 3902 m2 0.5485 0.008 0.08791209 TRUE
## 9 81126 0.2498160 5937 m2 0.0270 0.002 0.02197802 TRUE
## 10 103054 0.2102690 4228 m2 0.0650 0.003 0.03296703 TRUE
## 11 105727 0.0738757 4943 m2 0.1240 0.001 0.01098901 TRUE
## 12 110242 -1.1506800 5999 m2 0.0005 0.001 0.01098901 TRUE
## 13 112525 0.0880859 2707 m2 0.5750 0.002 0.02197802 TRUE
## 14 137919 -0.0938825 5800 m2 0.1820 0.001 0.01098901 TRUE
## est_coef_ridge est_coef_PC
## 1 -1.26567006 -0.54155789
## 2 1.49269679 0.47778962
## 3 -0.68882563 -1.09829457
## 4 0.03244131 0.07870101
## 5 1.00111307 0.69022729
## 6 -1.25495464 -0.59235010
## 7 -1.25212009 -0.43574525
## 8 -0.63273782 -0.19321557
## 9 1.38199308 0.55501515
## 10 1.05941901 0.35069708
## 11 0.46937414 0.03574035
## 12 -3.67957137 -2.78866071
## 13 0.33567171 0.16265880
## 14 -0.43395260 -0.19213616
plot(new_muts$selCoef, new_muts$est_coef_ridge, abline(0,1), col="blue", pch=19, xlab="True effect size", ylab="Estimated effect size")
points(new_muts$selCoef, new_muts$est_coef_PC, abline(0,1), col="green", pch=15)
#LFMM parameters can alternatively be estimated by solving regularized least-squares mimimization, with lasso penalty as follows.
lfmm.lasso <- lfmm::lfmm_lasso(Y = scaled.genotype, X = phen, K = 3, nozero.prop = 0.02)
## It = 1/100, err2 = 0.999000000053069
## It = 2/100, err2 = 0.992443936559843
## It = 3/100, err2 = 0.992448956476981
## === lambda = 0.272089379187123, no zero B proportion = 0.00238333205197201
## It = 1/100, err2 = 0.992449244390459
## It = 2/100, err2 = 0.992443611515161
## === lambda = 0.259722496977145, no zero B proportion = 0.00276774044745137
## It = 1/100, err2 = 0.992443276991036
## It = 2/100, err2 = 0.992437233935841
## === lambda = 0.247917708649891, no zero B proportion = 0.00315214884293073
## It = 1/100, err2 = 0.992436872552334
## It = 2/100, err2 = 0.992430809570413
## === lambda = 0.236649466170892, no zero B proportion = 0.00338279388021834
## It = 1/100, err2 = 0.992430455598705
## It = 2/100, err2 = 0.992424324472974
## === lambda = 0.225893382703272, no zero B proportion = 0.00392096563388944
## It = 1/100, err2 = 0.992423970653498
## It = 2/100, err2 = 0.992417351879739
## === lambda = 0.215626179829529, no zero B proportion = 0.00522795417851926
## It = 1/100, err2 = 0.99241694474686
## It = 2/100, err2 = 0.992409506261531
## === lambda = 0.205825637172164, no zero B proportion = 0.00668870608134082
## It = 1/100, err2 = 0.992409025379002
## It = 2/100, err2 = 0.992401111444979
## === lambda = 0.196470544304128, no zero B proportion = 0.00799569462597063
## It = 1/100, err2 = 0.992400571751582
## It = 2/100, err2 = 0.992391842262289
## === lambda = 0.187540654845016, no zero B proportion = 0.00961020988698393
## It = 1/100, err2 = 0.992391257156359
## It = 2/100, err2 = 0.992381580774635
## === lambda = 0.17901664264366, no zero B proportion = 0.0117628969016683
## It = 1/100, err2 = 0.992380906011195
## It = 2/100, err2 = 0.992369454530003
## === lambda = 0.170880059952288, no zero B proportion = 0.0141462289536403
## It = 1/100, err2 = 0.992368631542013
## It = 2/100, err2 = 0.992351987408391
## It = 3/100, err2 = 0.992349921758794
## === lambda = 0.163113297501739, no zero B proportion = 0.0172214961174752
## It = 1/100, err2 = 0.99234978725299
## It = 2/100, err2 = 0.992318464074598
## It = 3/100, err2 = 0.992316379116006
## === lambda = 0.155699546391307, no zero B proportion = 0.0216037518259399
#The lfmm.lasso object contains new estimates for the latent variables and for the effect sizes. Note that for lasso, we didn't set the value of a regularization parameter. Instead, we set the proportion of non-null effects (here 2 percent).
lfmm.test <- lfmm::lfmm_test(Y = scaled.genotype, X = phen, lfmm = lfmm.lasso, calibrate = "gif")
p.values <- lfmm.test$calibrated.pvalue
lfmm.test$gif
## [1] 2.687169
hist(p.values, col = "lightblue")
qval <- qvalue::qvalue(p.values)
plot(qval)
qval <- qvalue::qvalue(p.values, fdr.level = 0.005)
candidates <- which(qval$significant)
plot(training$position, -log10(p.values), cex = .5, pch = 19, col = "black", main="LFMM lasso", ylim=c(0, 50), xaxs="i")
plot_layers(y_head=45, y_arrows=c(5,0))
library(rehh)
## Loading required package: rehh.data
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
### Convert vcf@gt to haplotype format .thap
# one file for each chromosome
#SNP1 SNP2 SNP3
#IND1 hap1 A T A
#IND1 hap2 A C T
#IND2 hap1 G C T
#IND2 hap2 A T A
nlociperchrom <- table(vcf@fix[,"CHROM"])
substring("0|1", 1, last=1)
## [1] "0"
substring("0|1", 3, last=3)
## [1] "1"
head(vcf_filt)
## [1] "***** Object of class 'vcfR' *****"
## [1] "***** Meta section *****"
## [1] "##fileformat=VCFv4.2"
## [1] "##fileDate=20171003"
## [1] "##source=SLiM"
## [1] "##INFO=<ID=MID,Number=1,Type=Integer,Description=\"Mutation ID in SLiM\">"
## [1] "##INFO=<ID=S,Number=1,Type=Float,Description=\"Selection Coefficient\">"
## [1] "##INFO=<ID=DOM,Number=1,Type=Float,Description=\"Dominance\">"
## [1] "First 6 rows."
## [1]
## [1] "***** Fixed section *****"
## CHROM POS ID REF ALT QUAL FILTER
## [1,] "1" "7" NA "A" "T" "1000" "PASS"
## [2,] "1" "19" NA "A" "T" "1000" "PASS"
## [3,] "1" "20" NA "A" "T" "1000" "PASS"
## [4,] "1" "70" NA "A" "T" "1000" "PASS"
## [5,] "1" "97" NA "A" "T" "1000" "PASS"
## [6,] "1" "100" NA "A" "T" "1000" "PASS"
## [1]
## [1] "***** Genotype section *****"
## FORMAT i0 i1 i2 i3 i4
## [1,] "GT" "1|1" "1|1" "1|1" "1|1" "1|1"
## [2,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [3,] "GT" "0|1" "0|0" "0|1" "0|0" "0|0"
## [4,] "GT" "0|1" "0|0" "1|1" "1|0" "1|0"
## [5,] "GT" "0|0" "0|0" "0|0" "0|0" "0|0"
## [6,] "GT" "1|0" "1|1" "0|0" "1|1" "1|1"
## [1] "First 6 columns only."
## [1]
## [1] "Unique GT formats:"
## [1] "GT"
## [1]
rem2 <- which(duplicated(vcf_filt@fix[,"POS"]))
sort(vcf_filt@fix[rem2,"POS"])
## [1] "100878" "104011" "111996" "123483" "12587" "127743" "128827"
## [8] "129938" "130331" "131420" "131651" "134116" "13559" "138688"
## [15] "144856" "14747" "148472" "152404" "15534" "156750" "163099"
## [22] "163461" "167236" "167289" "172799" "178820" "187362" "194105"
## [29] "19414" "199409" "201469" "202057" "203799" "204330" "204588"
## [36] "208737" "209274" "210510" "214954" "215863" "216112" "218783"
## [43] "223313" "224110" "226535" "228151" "23648" "241372" "242446"
## [50] "243145" "247310" "2489" "24899" "250703" "250990" "262897"
## [57] "263557" "264659" "269330" "269691" "27467" "27616" "280934"
## [64] "286410" "290483" "297128" "300889" "302106" "307137" "309410"
## [71] "312748" "312748" "314819" "314925" "315235" "318617" "319178"
## [78] "321220" "321499" "325493" "325644" "328433" "329409" "331733"
## [85] "335201" "335999" "336442" "342570" "346320" "346320" "346644"
## [92] "349640" "352217" "353892" "35594" "356458" "363991" "368855"
## [99] "373487" "375440" "378360" "384195" "384475" "386505" "386722"
## [106] "387167" "391793" "397539" "398342" "39953" "401462" "403569"
## [113] "408133" "409128" "41165" "412681" "414380" "415457" "415719"
## [120] "423089" "423133" "426774" "427028" "437326" "437431" "438833"
## [127] "439863" "445645" "454481" "46069" "461170" "462916" "464914"
## [134] "465274" "466353" "47216" "472243" "472639" "475424" "477029"
## [141] "482652" "483984" "484992" "489377" "489782" "490838" "49160"
## [148] "493373" "495022" "495910" "497929" "57233" "59284" "61671"
## [155] "64217" "70053" "70722" "71016" "71245" "7293" "76650"
## [162] "79040" "80838" "81168" "86386" "88992" "93587"
vcf_filt2 = vcf_filt[-rem2,]
### Get into right format
for (i in 1:length(nlociperchrom)){
keep <- which((vcf_filt2@fix[,"CHROM"]==i))
head(vcf_filt2@gt[keep,1:10], 10)
hap1 <- apply(vcf_filt2@gt[keep,-1], 1, FUN=function(x) substring(x,1,1))
dim(hap1)
#head(hap1[,1:10])
hap2 <- apply(vcf_filt2@gt[keep,-1], 1, FUN=function(x) substring(x,3,3))
#head(hap2[,1:10])
hapt_out <- matrix(NA, nrow=2*1000, ncol=length(keep))
odd <- seq(1,(2*1000), by=2)
even <- odd +1
hapt_out[odd,] <- as.numeric(hap1)
rownames(hapt_out) <- rep("", nrow(hapt_out))
rownames(hapt_out)[odd] <- rownames(hap1)
rownames(hapt_out)[even] <- rownames(hap2)
hapt_out[even,] <- as.numeric(hap2)
#head(hapt_out[,1:10])
write.table(cbind(rownames(hapt_out), hapt_out+1), paste0("temp/",seed,"chrom",i,".thap"), row.names=F, col.names=F, quote = FALSE)
}
#a<- read.table("chrom1.thap")
### Also need to convert map.inp
#Each line contains five columns corresponding to:
#1. the SNP name
#2. the SNP chromosome (or scaffold) of origin
#3. the SNP position on the chromosome (or scaffold). Note that it is up to the user to choose either
#physical or genetic map positions (if available).
#4. the SNP ancestral allele (as coded in the haplotype input file)
#5. the SNP derived alleles (as coded in the haplotype input file)
map <- data.frame(name=1:nrow(vcf_filt2), chrom=as.numeric(vcf_filt2@fix[,"CHROM"]),
pos=as.numeric(vcf_filt2@fix[,"POS"]), anc=1, derived=2)
# setting anc=0 and derived = 1 thinks missing data
head(map)
## name chrom pos anc derived
## 1 1 1 7 1 2
## 2 2 1 19 1 2
## 3 3 1 20 1 2
## 4 4 1 70 1 2
## 5 5 1 97 1 2
## 6 6 1 100 1 2
which(duplicated(map$pos))
## integer(0)
write.table(map, paste0("temp/",seed,"map.inp"), row.names=F, col.names=F)
cnt=0
for(i in 1:length(nlociperchrom)){
cnt=cnt+1
tmp.hapfile=paste0("temp/",seed,"chrom",i,".thap")
tmp.hap=data2haplohh(hap_file=tmp.hapfile, map_file=paste0("temp/",seed,"map.inp"), chr.name=i,haplotype.in.columns=FALSE)
tmp.scan=scan_hh(tmp.hap,threads=4)
if(cnt==1){wgscan=tmp.scan}else{wgscan=rbind(wgscan,tmp.scan)}
}
## Map file seems OK: 1316 SNPs declared for chromosome 1
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1316 SNPs
## Map file seems OK: 1294 SNPs declared for chromosome 2
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1294 SNPs
## Map file seems OK: 1233 SNPs declared for chromosome 3
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1233 SNPs
## Map file seems OK: 1260 SNPs declared for chromosome 4
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1260 SNPs
## Map file seems OK: 1270 SNPs declared for chromosome 5
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1270 SNPs
## Map file seems OK: 1278 SNPs declared for chromosome 6
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1278 SNPs
## Map file seems OK: 1351 SNPs declared for chromosome 7
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1351 SNPs
## Map file seems OK: 1300 SNPs declared for chromosome 8
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1300 SNPs
## Map file seems OK: 1209 SNPs declared for chromosome 9
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1209 SNPs
## Map file seems OK: 1329 SNPs declared for chromosome 10
## Standard rehh input file assumed
## Discard Haplotype with less than 100 % of genotyped SNPs
## No haplotype discarded
## Discard SNPs genotyped on less than 100 % of haplotypes
## No SNP discarded
## Data consists of 2000 haplotypes and 1329 SNPs
dim(wgscan)
## [1] 12840 7
ihs=ihh2ihs(wgscan,minmaf=0.05,freqbin=0.05)
head(ihs$iHS,25)
## CHR POSITION iHS -log10(p-value)
## 3 1 20 NA NA
## 4 1 70 NA NA
## 6 1 100 NA NA
## 8 1 136 NA NA
## 10 1 260 NA NA
## 11 1 285 NA NA
## 15 1 502 NA NA
## 17 1 643 NA NA
## 24 1 842 NA NA
## 25 1 861 NA NA
## 27 1 905 NA NA
## 28 1 945 NA NA
## 30 1 964 NA NA
## 31 1 982 NA NA
## 33 1 1036 NA NA
## 40 1 1385 NA NA
## 42 1 1500 NA NA
## 43 1 1516 NA NA
## 50 1 1937 NA NA
## 51 1 1947 NA NA
## 55 1 2038 NA NA
## 56 1 2088 NA NA
## 60 1 2207 NA NA
## 64 1 2278 NA NA
## 71 1 2415 NA NA
tail(ihs$iHS)
## CHR POSITION iHS -log10(p-value)
## 12803 10 498237 NA NA
## 12804 10 498347 NA NA
## 12809 10 498593 NA NA
## 12815 10 498934 NA NA
## 12825 10 499395 NA NA
## 12831 10 499675 NA NA
ihs$frequency.class
## Number of SNPs mean of the log(iHHA/iHHD) ratio
## 0.05 - 0.1 81 1.10640109
## 0.1 - 0.15 73 0.82773810
## 0.15 - 0.2 73 0.49641375
## 0.2 - 0.25 87 0.34196810
## 0.25 - 0.3 105 0.32007144
## 0.3 - 0.35 104 0.16180145
## 0.35 - 0.4 152 -0.08759543
## 0.4 - 0.45 120 -0.01889971
## 0.45 - 0.5 134 -0.07102058
## 0.5 - 0.55 155 -0.09114274
## 0.55 - 0.6 201 -0.17661796
## 0.6 - 0.65 272 -0.25309133
## 0.65 - 0.7 222 -0.37881039
## 0.7 - 0.75 274 -0.58096588
## 0.75 - 0.8 348 -0.69135457
## 0.8 - 0.85 422 -0.79691182
## 0.85 - 0.9 582 -1.01064003
## 0.9 - 0.95 1079 -1.32595023
## sd of the log(iHHA/iHHD) ratio
## 0.05 - 0.1 0.5250669
## 0.1 - 0.15 0.4895879
## 0.15 - 0.2 0.4143042
## 0.2 - 0.25 0.4236345
## 0.25 - 0.3 0.4136948
## 0.3 - 0.35 0.3455953
## 0.35 - 0.4 0.3876334
## 0.4 - 0.45 0.3935355
## 0.45 - 0.5 0.3526019
## 0.5 - 0.55 0.3859168
## 0.55 - 0.6 0.3616476
## 0.6 - 0.65 0.4468767
## 0.65 - 0.7 0.3850021
## 0.7 - 0.75 0.4842224
## 0.75 - 0.8 0.4284628
## 0.8 - 0.85 0.4311297
## 0.85 - 0.9 0.4899910
## 0.9 - 0.95 0.5832333
#distribplot(ihs$iHS$iHS)
#ihsplot(ihs)
ihs$iHS[which(ihs$iHS[,4]>2),]
## CHR POSITION iHS -log10(p-value)
## 1545 2 58932 -2.590425 2.018373
## 2390 2 91113 3.553271 3.419677
## 3337 3 129544 -2.658302 2.104933
## 4556 4 178967 3.030838 2.612830
## 4882 4 191235 -2.788518 2.276136
## 4933 4 192910 -2.708436 2.170046
## 5907 5 232321 2.907133 2.437995
## 6095 5 239622 2.942344 2.487131
## 8127 7 318097 -2.643086 2.085370
## 8218 7 320805 -2.608879 2.041726
## 8219 7 320901 2.725242 2.192097
## 8621 7 335188 2.694161 2.151403
## 8839 7 344138 -3.308831 3.028323
## 8849 7 344366 -2.682280 2.135951
## 9287 8 361896 3.308501 3.027812
## 10023 8 390942 3.324688 3.052971
## 11064 9 431569 -3.090875 2.699911
## 11101 9 433103 -2.612160 2.045892
## 11167 9 436359 -3.150746 2.788202
## 11215 9 438205 -3.306212 3.024262
## 11219 9 438304 2.820079 2.318652
## 11635 10 454563 -3.214270 2.883469
## 11926 10 465324 2.716292 2.180340
## 11927 10 465328 2.716292 2.180340
## 11938 10 465797 -3.359070 3.106764
## 12221 10 476890 3.000196 2.568948
plot(ihs$iHS$POSITION,ihs$iHS$`-log10(p-value)`, col="grey", pch=20, main="REHH iHS")
plot_layers()
plot(ihs$iHS$POSITION,ihs$iHS$iHS, col="grey", pch=20, main="REHH iHS")
plot_layers()